gusucode.com > 《MATLAB图像与视频处理实用案例详解》代码 > 《MATLAB图像与视频处理实用案例详解》代码/第 19 章 基于语音识别的信号灯图像模拟控制技术/voicebox/voicebox.m
function y=voicebox(f,v) %VOICEBOX set global parameters for Voicebox functions Y=(FIELD,VAL) % % Inputs: F is a field name % V is a new value for the field % % Outputs: Y is set equal to the structure of parameters if the % f and v inputs are both present or both absent. If only % input f is specified, then y is set to the value of the % corresponding field or null if it doesn't exist. % % This routine contains default values for constants that are used by % other functions in the VOICEBOX toolbox. Values in the first section below, % entitled "System-dependent directory paths" should be set as follows: % PP.dir_temp directory for storing temporary files % PP.dir_data default directory to preappend to speech data file names % when the "d" option is specified in READWAV etc. % PP.shorten location of SHORTEN executable. SHORTEN is a proprietary file compression % algorithm that is used for some SPHERE-format files. READSPH % will try to call an external decoder if it is asked to % read such a compressed file. % PP.sfsbin location of Speech Filing Sysytem binaries. If the "c" option % is given to READSFS, it will try to create a requested item % if it is not present in the SFS file. This parameter tells it % where to find the SFS executables. % PP.sfssuffix suffix for Speech Filing Sysytem binaries. READSFS uses this paremeter % to create the name of an SFS executable (see PP.sfsbin above). % Other values defined in this routine are the defaults for specific algorithm constants. % If you want to change these, please refer to the individual routines for a fuller description. % Bugs/Suggestions % (1) Could allow a * at the end of F to act as a wildcard and return/print a part structure % Copyright (C) Mike Brookes 2003 % Version: $Id: voicebox.m,v 1.17 2011/07/28 06:48:45 dmb Exp $ % % VOICEBOX is a MATLAB toolbox for speech processing. % Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You can obtain a copy of the GNU General Public License from % http://www.gnu.org/copyleft/gpl.html or by writing to % Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% persistent PP if isempty(PP) % System-dependent directory paths and constants PP.dir_temp='F:\TEMP'; % directory for storing temporary files PP.dir_data='E:\dmb\data\speech'; % default directory to preappend to speech data file names PP.shorten='C:\bin\shorten.exe'; % location of shorten executable PP.flac='C:\bin\flac.exe'; % location of flac executable PP.sfsbin='F:\Program Files\SFS\Program'; % location of Speech Filing Sysytem binaries PP.sfssuffix='.exe'; % suffix for Speech Filing Sysytem binaries PP.memsize=50e6; % Maximum amount of temporary memory to use (Bytes) % DYPSA glottal closure identifier PP.dy_cpfrac=0.3; % presumed closed phase fraction of larynx cycle PP.dy_cproj=0.2; % cost of projected candidate PP.dy_cspurt=-0.45; % cost of a talkspurt PP.dy_dopsp=1; % Use phase slope projection (1) or not (0)? PP.dy_ewdly=0.0008; % window delay for energy cost function term [~ energy peak delay from closure] (sec) PP.dy_ewlen=0.003; % window length for energy cost function term (sec) PP.dy_ewtaper=0.001; % taper length for energy cost function window (sec) PP.dy_fwlen=0.00045; % window length used to smooth group delay (sec) PP.dy_fxmax=500; % max larynx frequency (Hz) PP.dy_fxmin=50; % min larynx frequency (Hz) PP.dy_fxminf=60; % min larynx frequency (Hz) [used for Frobenius norm only] PP.dy_gwlen=0.0030; % group delay evaluation window length (sec) PP.dy_lpcdur=0.020; % lpc analysis frame length (sec) PP.dy_lpcn=2; % lpc additional poles PP.dy_lpcnf=0.001; % lpc poles per Hz (1/Hz) PP.dy_lpcstep=0.010; % lpc analysis step (sec) PP.dy_nbest=5; % Number of NBest paths to keep PP.dy_preemph=50; % pre-emphasis filter frequency (Hz) (to avoid preemphasis, make this very large) PP.dy_spitch=0.2; % scale factor for pitch deviation cost PP.dy_wener=0.3; % DP energy weighting PP.dy_wpitch=0.5; % DP pitch weighting PP.dy_wslope=0.1; % DP group delay slope weighting PP.dy_wxcorr=0.8; % DP cross correlation weighting PP.dy_xwlen=0.01; % cross-correlation length for waveform similarity (sec) % RAPT pitch tracker PP.rapt_f0min=50; % Min F0 (Hz) PP.rapt_f0max=500; % Max F0 (Hz) PP.rapt_tframe=0.01; % frame size (s) PP.rapt_tlpw=0.005; % low pass filter window size (s) PP.rapt_tcorw=0.0075; % correlation window size (s) PP.rapt_candtr=0.3; % minimum peak in NCCF PP.rapt_lagwt=0.3; % linear lag taper factor PP.rapt_freqwt=0.02; % cost factor for F0 change PP.rapt_vtranc=0.005; % fixed voice-state transition cost PP.rapt_vtrac=0.5; % delta amplitude modulated transition cost PP.rapt_vtrsc=0.5; % delta spectrum modulated transition cost PP.rapt_vobias=0.0; % bias to encourage voiced hypotheses PP.rapt_doublec=0.35; % cost of exact doubling or halving PP.rapt_absnoise=0; % absolute rms noise level PP.rapt_relnoise=2; % rms noise level relative to noise floor PP.rapt_signoise=0.001; % ratio of peak signal rms to noise floor (0.001 = 60dB) PP.rapt_ncands=20; % max hypotheses at each frame PP.rapt_trms=0.03; % window length for rms measurement PP.rapt_dtrms=0.02; % window spacing for rms measurement PP.rapt_preemph=-7000; % s-plane position of preemphasis zero PP.rapt_nfullag=7; % number of full lags to try (must be odd) % now check some of the key values for validity if exist(PP.dir_temp)~=7 % check that temp directory exists PP.dir_temp = winenvar('temp'); % else use windows temp directory end [fnp,fnn,fne]=fileparts(mfilename('fullpath')); if exist(PP.shorten)~=2 % check that shorten executable exists PP.shorten=fullfile(fnp,'shorten.exe'); % next try local directory if exist(PP.shorten)~=2 % check if it exists in local directory PP.shorten='shorten.exe'; % finally assume it is on the search path end end if exist(PP.flac)~=2 % check that flac executable exists PP.flac=fullfile(fnp,'flac.exe'); % next try local directory if exist(PP.flac)~=2 % check if it exists in local directory PP.shorten='flac.exe'; % finally assume it is on the search path end end end if nargin==0 if nargout==0 % list all fields nn=sort(fieldnames(PP)); cnn=char(nn); fprintf('%d Voicebox parameters:\n',length(nn)); for i=1:length(nn); if ischar(PP.(nn{i})) fmt=' %s = %s\n'; else fmt=' %s = %g\n'; end fprintf(fmt,cnn(i,:),PP.(nn{i})); end else y=PP; end elseif nargin==1 if isfield(PP,f) y=PP.(f); else y=[]; end else if isfield(PP,f) PP.(f)=v; y=PP; else error(sprintf('''%s'' is not a valid voicebox field name',f)); end end